package com.trytech.mongoocrawler.client.parser.lianjia;

import com.alibaba.fastjson.JSONObject;
import com.trytech.mongoocrawler.client.common.queue.UrlFetcherEventProducer;
import com.trytech.mongoocrawler.client.parser.HtmlParser;
import com.trytech.mongoocrawler.client.transport.http.HttpBody;
import com.trytech.mongoocrawler.client.transport.http.UrlResult;
import com.trytech.mongoocrawler.client.transport.http.WebResult;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.nio.charset.Charset;

/**
 * Created by coliza on 2017/7/1.
 */
public class LianjiaSubpageParser extends HtmlParser<Boolean> {
    @Override
    public Boolean parse(WebResult webResult, UrlFetcherEventProducer urlProducer) {
        try {
            HttpBody htmlBody = ((WebResult<HttpBody>)webResult).getData();
            Document doc = Jsoup.parse(htmlBody.getContentString());
            doc.charset(Charset.forName("UTF-8"));
            Element body = doc.body();
            Elements pageDiv = body.getElementsByClass("house-lst-page-box");
            String pageJsonStr = pageDiv.attr("page-data");
            JSONObject pageJSON = JSONObject.parseObject(pageJsonStr);
            int totalPage = pageJSON.getIntValue("totalPage");
            for(int i=1;i<=totalPage;i++) {
                urlProducer.sendData(new UrlResult("https://cd.lianjia.com/ershoufang/pg"+i+"/" , new LianjiaHtmlParser()));
            }
            return null;
        }catch (Exception e){
            e.printStackTrace();
            return null;
        }

    }
}
